import os
import tensorflow as tf
# If there are multiple GPUs and we only want to use one/some, set the number in the visible device list.
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
# This sets the GPU to allocate memory only as needed
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) != 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
2022-02-24 10:21:14.568990: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1 2022-02-24 10:21:15.647535: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set 2022-02-24 10:21:15.650452: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1 2022-02-24 10:21:15.712081: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-24 10:21:15.712368: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2080 SUPER computeCapability: 7.5 coreClock: 1.815GHz coreCount: 48 deviceMemorySize: 7.77GiB deviceMemoryBandwidth: 462.00GiB/s 2022-02-24 10:21:15.712384: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1 2022-02-24 10:21:15.729511: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10 2022-02-24 10:21:15.729551: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10 2022-02-24 10:21:15.738896: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10 2022-02-24 10:21:15.741076: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10 2022-02-24 10:21:15.757423: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.10 2022-02-24 10:21:15.759745: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.10 2022-02-24 10:21:15.788549: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7 2022-02-24 10:21:15.788608: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-24 10:21:15.788835: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-24 10:21:15.789015: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0
This assignment will focus on the CIFAR10 dataset. This is a collection of small images in 10 classes such as cars, cats, birds, etc. You can find more information here: https://www.cs.toronto.edu/~kriz/cifar.html. We start by loading and examining the data.
import numpy as np
from tensorflow.keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print("Shape of training data:")
print(X_train.shape)
print(y_train.shape)
print("Shape of test data:")
print(X_test.shape)
print(y_test.shape)
Shape of training data: (50000, 32, 32, 3) (50000, 1) Shape of test data: (10000, 32, 32, 3) (10000, 1)
The shape of X_train and X_test has 4 values. What do each of these represent?
This plots a random selection of images from each class. Rerun the cell to see a different selection.
from Custom import PlotRandomFromEachClass
cifar_labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
PlotRandomFromEachClass(X_train, y_train, 3, labels=cifar_labels)
Just like the MNIST dataset we normalize the images to [0,1] and transform the class indices to one-hot encoded vectors.
from tensorflow.keras.utils import to_categorical
# Transform label indices to one-hot encoded vectors
y_train_c = to_categorical(y_train, num_classes=10)
y_test_c = to_categorical(y_test , num_classes=10)
# Normalization of pixel values (to [0-1] range)
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten
from Custom import PlotModelEval
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization, Activation
from tensorflow.keras.utils import plot_model
We will start by creating a fully connected classifier using the Dense layer. We give you the first layer that flattens the image features to a single vector. Add the remaining layers to the network.
Consider what the size of the output must be and what activation function you should use in the output layer.
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten
x_in = Input(shape=X_train.shape[1:])
x = Flatten()(x_in)
# === Add your code here ===
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(10, activation='softmax')(x)
# ==========================
model = Model(inputs=x_in, outputs=x)
# Now we build the model using Stochastic Gradient Descent with Nesterov momentum. We use accuracy as the metric.
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary(100)
Model: "model" ____________________________________________________________________________________________________ Layer (type) Output Shape Param # ==================================================================================================== input_1 (InputLayer) [(None, 32, 32, 3)] 0 ____________________________________________________________________________________________________ flatten (Flatten) (None, 3072) 0 ____________________________________________________________________________________________________ dense (Dense) (None, 64) 196672 ____________________________________________________________________________________________________ dense_1 (Dense) (None, 64) 4160 ____________________________________________________________________________________________________ dense_2 (Dense) (None, 64) 4160 ____________________________________________________________________________________________________ dense_3 (Dense) (None, 64) 4160 ____________________________________________________________________________________________________ dense_4 (Dense) (None, 64) 4160 ____________________________________________________________________________________________________ dense_5 (Dense) (None, 10) 650 ==================================================================================================== Total params: 213,962 Trainable params: 213,962 Non-trainable params: 0 ____________________________________________________________________________________________________
2022-02-23 12:41:18.290075: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2022-02-23 12:41:18.290620: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-23 12:41:18.290840: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2080 SUPER computeCapability: 7.5 coreClock: 1.815GHz coreCount: 48 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 462.00GiB/s 2022-02-23 12:41:18.290862: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1 2022-02-23 12:41:18.290878: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10 2022-02-23 12:41:18.290886: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10 2022-02-23 12:41:18.290893: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10 2022-02-23 12:41:18.290900: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10 2022-02-23 12:41:18.290907: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.10 2022-02-23 12:41:18.290915: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.10 2022-02-23 12:41:18.290922: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7 2022-02-23 12:41:18.290960: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-23 12:41:18.291170: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-23 12:41:18.291352: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0 2022-02-23 12:41:18.291374: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1 2022-02-23 12:41:18.592300: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1261] Device interconnect StreamExecutor with strength 1 edge matrix: 2022-02-23 12:41:18.592318: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1267] 0 2022-02-23 12:41:18.592321: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1280] 0: N 2022-02-23 12:41:18.592430: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-23 12:41:18.592661: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-23 12:41:18.592864: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-23 12:41:18.593055: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1406] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6792 MB memory) -> physical GPU (device: 0, name: NVIDIA GeForce RTX 2080 SUPER, pci bus id: 0000:01:00.0, compute capability: 7.5) 2022-02-23 12:41:18.593240: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
In order to show the differences between models in the first parts of the assignment, we will restrict the training to the following command using 15 epochs, batch size 32, and 20% validation data. From section 5 and forward you can change this as you please to increase the accuracy, but for now stick with this command.
history = model.fit(X_train,y_train_c, epochs=15, batch_size=128, verbose=1, validation_split=0.2)
2022-02-23 12:41:24.973991: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2) 2022-02-23 12:41:24.994424: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3699850000 Hz
Epoch 1/15 41/313 [==>...........................] - ETA: 0s - loss: 2.2903 - accuracy: 0.1167
2022-02-23 12:41:25.277806: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
313/313 [==============================] - 1s 2ms/step - loss: 2.1054 - accuracy: 0.2082 - val_loss: 1.8588 - val_accuracy: 0.3131 Epoch 2/15 313/313 [==============================] - 0s 1ms/step - loss: 1.7459 - accuracy: 0.3705 - val_loss: 1.7243 - val_accuracy: 0.3807 Epoch 3/15 313/313 [==============================] - 0s 1ms/step - loss: 1.6493 - accuracy: 0.4085 - val_loss: 1.6879 - val_accuracy: 0.3926 Epoch 4/15 313/313 [==============================] - 0s 1ms/step - loss: 1.5745 - accuracy: 0.4342 - val_loss: 1.6284 - val_accuracy: 0.4196 Epoch 5/15 313/313 [==============================] - 0s 1ms/step - loss: 1.5487 - accuracy: 0.4449 - val_loss: 1.5955 - val_accuracy: 0.4318 Epoch 6/15 313/313 [==============================] - 0s 1ms/step - loss: 1.5206 - accuracy: 0.4554 - val_loss: 1.5443 - val_accuracy: 0.4519 Epoch 7/15 313/313 [==============================] - 0s 2ms/step - loss: 1.4701 - accuracy: 0.4691 - val_loss: 1.5503 - val_accuracy: 0.4445 Epoch 8/15 313/313 [==============================] - 0s 1ms/step - loss: 1.4555 - accuracy: 0.4788 - val_loss: 1.5140 - val_accuracy: 0.4623 Epoch 9/15 313/313 [==============================] - 0s 1ms/step - loss: 1.4247 - accuracy: 0.4900 - val_loss: 1.5220 - val_accuracy: 0.4651 Epoch 10/15 313/313 [==============================] - 0s 1ms/step - loss: 1.4138 - accuracy: 0.4899 - val_loss: 1.4843 - val_accuracy: 0.4733 Epoch 11/15 313/313 [==============================] - 0s 2ms/step - loss: 1.3934 - accuracy: 0.4995 - val_loss: 1.5392 - val_accuracy: 0.4555 Epoch 12/15 313/313 [==============================] - 0s 1ms/step - loss: 1.3634 - accuracy: 0.5131 - val_loss: 1.4952 - val_accuracy: 0.4754 Epoch 13/15 313/313 [==============================] - 0s 1ms/step - loss: 1.3509 - accuracy: 0.5169 - val_loss: 1.4762 - val_accuracy: 0.4770 Epoch 14/15 313/313 [==============================] - 0s 1ms/step - loss: 1.3469 - accuracy: 0.5178 - val_loss: 1.5171 - val_accuracy: 0.4682 Epoch 15/15 313/313 [==============================] - 0s 1ms/step - loss: 1.3379 - accuracy: 0.5204 - val_loss: 1.4908 - val_accuracy: 0.4770
We use model.evaluate to get the loss and metric scores on the test data. To plot the results we give you a custom function that does the work for you.
score = model.evaluate(X_test, y_test_c, batch_size=128, verbose=0)
for i in range(len(score)):
print("Test " + model.metrics_names[i] + " = %.3f" % score[i])
Test loss = 1.463 Test accuracy = 0.484
from Custom import PlotModelEval
# Custom function for evaluating the model and plotting training history
PlotModelEval(model, history, X_test, y_test, cifar_labels)
Train a model that achieves above 45% accuracy on the test data. In the report, provide a (short) description of your model and show the evaluation image.
Compare this model to the one you used for the MNIST dataset in the first assignment, in terms of size and test accuracy. Why do you think this dataset is much harder to classify than the MNIST handwritten digits?
We will now move on to a network architecture that is more suited for this problem, the convolutional neural network. The new layers you will use are Conv2D and MaxPooling2D, which you can find the documentation of here https://www.tensorflow.org/api_docs/python/tf/keras/layers/Conv2D and here https://www.tensorflow.org/api_docs/python/tf/keras/layers/MaxPool2D.
A common way to build convolutional neural networks is to create blocks of layers of the form [convolution - activation - pooling], and then stack several of these block to create the full convolution stack. This is often followed by a fully connected network to create the output classes. Use this recipe to build a CNN that acheives at least 62% accuracy on the test data.
Side note. Although this is a common way to build CNNs, it is be no means the only or even best way. It is a good starting point, but later in part 5 you might want to explore other architectures to acheive even better performance.
from tensorflow.keras.layers import Conv2D, MaxPooling2D
x_in = Input(shape=X_train.shape[1:])
# === Add your code here ===
x = Conv2D(32, kernel_size=(3, 3), activation = "relu")(x_in)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, kernel_size=(3, 3), activation = "relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, kernel_size=(3, 3), activation = "relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(64, activation="tanh")(x)
x = Dense(10, activation = "softmax")(x)
# ==========================
model = Model(inputs=x_in, outputs=x)
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=sgd)
model.summary(100)
Model: "model_1" ____________________________________________________________________________________________________ Layer (type) Output Shape Param # ==================================================================================================== input_2 (InputLayer) [(None, 32, 32, 3)] 0 ____________________________________________________________________________________________________ conv2d (Conv2D) (None, 30, 30, 32) 896 ____________________________________________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 15, 15, 32) 0 ____________________________________________________________________________________________________ conv2d_1 (Conv2D) (None, 13, 13, 64) 18496 ____________________________________________________________________________________________________ max_pooling2d_1 (MaxPooling2D) (None, 6, 6, 64) 0 ____________________________________________________________________________________________________ conv2d_2 (Conv2D) (None, 4, 4, 64) 36928 ____________________________________________________________________________________________________ max_pooling2d_2 (MaxPooling2D) (None, 2, 2, 64) 0 ____________________________________________________________________________________________________ flatten_1 (Flatten) (None, 256) 0 ____________________________________________________________________________________________________ dense_6 (Dense) (None, 64) 16448 ____________________________________________________________________________________________________ dense_7 (Dense) (None, 10) 650 ==================================================================================================== Total params: 73,418 Trainable params: 73,418 Non-trainable params: 0 ____________________________________________________________________________________________________
history = model.fit(X_train, y_train_c, batch_size=64, epochs=15, verbose=1, validation_split=0.2)
Epoch 1/15
2022-02-23 12:41:47.750471: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7 2022-02-23 12:41:48.283858: W tensorflow/stream_executor/gpu/asm_compiler.cc:63] Running ptxas --version returned 256 2022-02-23 12:41:48.325149: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: ptxas exited with non-zero error code 256, output: Relying on driver to perform ptx compilation. Modify $PATH to customize ptxas location. This message will be only logged once.
625/625 [==============================] - 8s 5ms/step - loss: 2.0235 - accuracy: 0.2486 - val_loss: 1.4551 - val_accuracy: 0.4748 Epoch 2/15 625/625 [==============================] - 1s 2ms/step - loss: 1.3916 - accuracy: 0.5001 - val_loss: 1.2282 - val_accuracy: 0.5628 Epoch 3/15 625/625 [==============================] - 1s 2ms/step - loss: 1.1706 - accuracy: 0.5851 - val_loss: 1.1070 - val_accuracy: 0.6095 Epoch 4/15 625/625 [==============================] - 1s 2ms/step - loss: 1.0531 - accuracy: 0.6268 - val_loss: 1.0333 - val_accuracy: 0.6361 Epoch 5/15 625/625 [==============================] - 1s 2ms/step - loss: 0.9448 - accuracy: 0.6697 - val_loss: 0.9923 - val_accuracy: 0.6540 Epoch 6/15 625/625 [==============================] - 1s 2ms/step - loss: 0.8754 - accuracy: 0.6908 - val_loss: 0.9765 - val_accuracy: 0.6604 Epoch 7/15 625/625 [==============================] - 1s 2ms/step - loss: 0.8120 - accuracy: 0.7185 - val_loss: 0.9176 - val_accuracy: 0.6838 Epoch 8/15 625/625 [==============================] - 1s 2ms/step - loss: 0.7537 - accuracy: 0.7376 - val_loss: 0.9171 - val_accuracy: 0.6879 Epoch 9/15 625/625 [==============================] - 1s 2ms/step - loss: 0.6936 - accuracy: 0.7581 - val_loss: 0.9264 - val_accuracy: 0.6908 Epoch 10/15 625/625 [==============================] - 1s 2ms/step - loss: 0.6407 - accuracy: 0.7767 - val_loss: 0.9163 - val_accuracy: 0.6868 Epoch 11/15 625/625 [==============================] - 1s 2ms/step - loss: 0.6028 - accuracy: 0.7911 - val_loss: 0.9113 - val_accuracy: 0.6973 Epoch 12/15 625/625 [==============================] - 1s 2ms/step - loss: 0.5692 - accuracy: 0.8010 - val_loss: 0.9027 - val_accuracy: 0.7020 Epoch 13/15 625/625 [==============================] - 1s 2ms/step - loss: 0.5218 - accuracy: 0.8181 - val_loss: 0.9552 - val_accuracy: 0.6990 Epoch 14/15 625/625 [==============================] - 1s 2ms/step - loss: 0.4894 - accuracy: 0.8314 - val_loss: 0.9328 - val_accuracy: 0.7083 Epoch 15/15 625/625 [==============================] - 1s 2ms/step - loss: 0.4583 - accuracy: 0.8389 - val_loss: 0.9465 - val_accuracy: 0.7005
score = model.evaluate(X_test, y_test_c, batch_size=128, verbose=0)
for i in range(len(score)):
print("Test " + model.metrics_names[i] + " = %.3f" % score[i])
Test loss = 0.958 Test accuracy = 0.693
PlotModelEval(model, history, X_test, y_test, cifar_labels)
Train a model that achieves at least 62% test accuracy. In the report, provide a (short) description of your model and show the evaluation image.
Compare this model with the previous fully connected model. You should find that this one is much more efficient, i.e. achieves higher accuracy with fewer parameters. Explain in your own words how this is possible.
You have probably seen that your CNN model overfits the training data. One way to prevent this is to add Dropout layers to the model, that randomly "drops" hidden nodes each training-iteration by setting their output to zero. Thus the model cannot rely on a small set of very good hidden features, but must instead learns to use different sets of hidden features each time. Dropout layers are usually added after the pooling layers in the convolution part of the model, or after activations in the fully connected part of the model.
Side note. In the next assignment you will work with Ensemble models, a way to use the output from several individual models to achieve higher performance than each model can achieve on its own. One way to interpret Dropout is that each random selection of nodes is a separate model that is trained only on the current iteration. The final output is then the average of outputs from all the individual models. In other words, Dropout can be seen as a way to build ensembling directly into the network, without having to train several models explicitly.
Extend your previous model with the Dropout layer and test the new performance.
from tensorflow.keras.layers import Dropout
x_in = Input(shape=X_train.shape[1:])
# === Add your code here ===
x = Conv2D(32, kernel_size=(3, 3), activation = "relu")(x_in)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, kernel_size=(3, 3), activation = "relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, kernel_size=(3, 3), activation = "relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(64, activation="tanh")(x)
x = Dropout(0.1)(x)
x = Dense(10, activation = "softmax")(x)
# ==========================
model = Model(inputs=x_in, outputs=x)
# Compile model
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=sgd)
model.summary(100)
Model: "model_7" ____________________________________________________________________________________________________ Layer (type) Output Shape Param # ==================================================================================================== input_8 (InputLayer) [(None, 32, 32, 3)] 0 ____________________________________________________________________________________________________ conv2d_18 (Conv2D) (None, 30, 30, 32) 896 ____________________________________________________________________________________________________ max_pooling2d_18 (MaxPooling2D) (None, 15, 15, 32) 0 ____________________________________________________________________________________________________ dropout_19 (Dropout) (None, 15, 15, 32) 0 ____________________________________________________________________________________________________ conv2d_19 (Conv2D) (None, 13, 13, 64) 18496 ____________________________________________________________________________________________________ max_pooling2d_19 (MaxPooling2D) (None, 6, 6, 64) 0 ____________________________________________________________________________________________________ dropout_20 (Dropout) (None, 6, 6, 64) 0 ____________________________________________________________________________________________________ conv2d_20 (Conv2D) (None, 4, 4, 64) 36928 ____________________________________________________________________________________________________ max_pooling2d_20 (MaxPooling2D) (None, 2, 2, 64) 0 ____________________________________________________________________________________________________ dropout_21 (Dropout) (None, 2, 2, 64) 0 ____________________________________________________________________________________________________ flatten_7 (Flatten) (None, 256) 0 ____________________________________________________________________________________________________ dense_18 (Dense) (None, 64) 16448 ____________________________________________________________________________________________________ dropout_22 (Dropout) (None, 64) 0 ____________________________________________________________________________________________________ dense_19 (Dense) (None, 10) 650 ==================================================================================================== Total params: 73,418 Trainable params: 73,418 Non-trainable params: 0 ____________________________________________________________________________________________________
history = model.fit(X_train, y_train_c, batch_size=64, epochs=15, verbose=1, validation_split=0.2)
Epoch 1/15 625/625 [==============================] - 1s 2ms/step - loss: 2.1010 - accuracy: 0.2096 - val_loss: 1.7158 - val_accuracy: 0.3761 Epoch 2/15 625/625 [==============================] - 1s 2ms/step - loss: 1.5912 - accuracy: 0.4181 - val_loss: 1.3872 - val_accuracy: 0.4978 Epoch 3/15 625/625 [==============================] - 1s 2ms/step - loss: 1.4126 - accuracy: 0.4847 - val_loss: 1.2469 - val_accuracy: 0.5586 Epoch 4/15 625/625 [==============================] - 1s 2ms/step - loss: 1.3120 - accuracy: 0.5264 - val_loss: 1.1806 - val_accuracy: 0.5818 Epoch 5/15 625/625 [==============================] - 1s 2ms/step - loss: 1.2491 - accuracy: 0.5526 - val_loss: 1.1797 - val_accuracy: 0.5676 Epoch 6/15 625/625 [==============================] - 1s 2ms/step - loss: 1.1974 - accuracy: 0.5736 - val_loss: 1.0831 - val_accuracy: 0.6188 Epoch 7/15 625/625 [==============================] - 1s 2ms/step - loss: 1.1613 - accuracy: 0.5836 - val_loss: 1.0456 - val_accuracy: 0.6357 Epoch 8/15 625/625 [==============================] - 1s 2ms/step - loss: 1.1263 - accuracy: 0.5980 - val_loss: 1.0189 - val_accuracy: 0.6455 Epoch 9/15 625/625 [==============================] - 1s 2ms/step - loss: 1.0832 - accuracy: 0.6145 - val_loss: 0.9940 - val_accuracy: 0.6430 Epoch 10/15 625/625 [==============================] - 1s 2ms/step - loss: 1.0684 - accuracy: 0.6233 - val_loss: 0.9526 - val_accuracy: 0.6684 Epoch 11/15 625/625 [==============================] - 1s 2ms/step - loss: 1.0393 - accuracy: 0.6355 - val_loss: 0.9771 - val_accuracy: 0.6581 Epoch 12/15 625/625 [==============================] - 1s 2ms/step - loss: 1.0235 - accuracy: 0.6379 - val_loss: 0.9219 - val_accuracy: 0.6832 Epoch 13/15 625/625 [==============================] - 1s 2ms/step - loss: 0.9976 - accuracy: 0.6459 - val_loss: 0.8806 - val_accuracy: 0.6918 Epoch 14/15 625/625 [==============================] - 1s 2ms/step - loss: 0.9885 - accuracy: 0.6500 - val_loss: 0.9099 - val_accuracy: 0.6833 Epoch 15/15 625/625 [==============================] - 1s 2ms/step - loss: 0.9707 - accuracy: 0.6571 - val_loss: 0.8804 - val_accuracy: 0.6919
score = model.evaluate(X_test, y_test_c, batch_size=128, verbose=0)
for i in range(len(score)):
print("Test " + model.metrics_names[i] + " = %.3f" % score[i])
Test loss = 0.895 Test accuracy = 0.694
PlotModelEval(model, history, X_test, y_test, cifar_labels)
Train the modified CNN-model. Save the evaluation image for the report.
Compare this model and the previous in terms of the training accuracy, validation accuracy, and test accuracy. Explain the similarities and differences (remember that the only difference between the models should be the addition of Dropout layers).
Hint: what does the dropout layer do at test time?
The final layer we will explore is BatchNormalization. As the name suggests, this layer normalizes the data in each batch to have a specific mean and standard deviation, which is learned during training. The reason for this is quite complicated (and still debated among the experts), but suffice to say that it helps the optimization converge faster which means we get higher performance in fewer epochs. The normalization is done separatly for each feature, i.e. the statistics are calculated accross the batch dimension of the input data. The equations for batch-normalizing one feature are the following, where $N$ is the batch size, $x$ the input features, and $y$ the normalized output features:
At first glance this might look intimidating, but all it means is that we begin by scaling and shifting the data to have mean $\mu=0$ and standard deviation $\sigma=1$. After this we use the learnable parameters $\gamma$ and $\beta$ to decide the width and center of the final distribution. $\epsilon$ is a small constant value that prevents the denominator from being zero.
In addition to learning the parameters $\gamma$ and $\beta$ by gradient decent just like the weights, Batch Normalization also keeps track of the running average of minibatch statistics $\mu$ and $\sigma$. These averages are used to normalize the test data. We can tune the rate at which the running averages are updated with the momentum parameter of the BatchNormalization layer. A large momentum means that the statistics converge more slowly and therefore requires more updates before it represents the data. A low momentum, on the other hand, adapts to the data more quickly but might lead to unstable behaviour if the latest minibatches are not representative of the whole dataset. For this test we recommend a momentum of 0.75, but you probably want to change this when you design a larger network in Section 5.
The batch normalization layer should be added after the hidden layer linear transformation, but before the nonlinear activation. This means that we cannot specify the activation funciton in the Conv2D or Dense if we want to batch-normalize the output. We therefore need to use the Activation layer to add a separate activation to the network stack after batch normalization. For example, the convolution block will now look like [conv - batchnorm - activation - pooling].
Extend your previous model with batch normalization, both in the convolution and fully connected part of the model.
from tensorflow.keras.layers import BatchNormalization, Activation
x_in = Input(shape=X_train.shape[1:])
# === Add your code here ===
x = Conv2D(32, kernel_size=(3, 3))(x_in)
x = BatchNormalization(momentum=0.75)(x)
x = Activation("relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, kernel_size=(3, 3))(x)
x = BatchNormalization(momentum=0.75)(x)
x = Activation("relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, kernel_size=(3, 3))(x)
x = BatchNormalization(momentum=0.75)(x)
x = Activation("relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(64, activation="tanh")(x)
x = Dense(10, activation = "softmax")(x)
# ==========================
model = Model(inputs=x_in, outputs=x)
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=sgd)
model.summary(100)
Model: "model_13" ____________________________________________________________________________________________________ Layer (type) Output Shape Param # ==================================================================================================== input_14 (InputLayer) [(None, 32, 32, 3)] 0 ____________________________________________________________________________________________________ conv2d_38 (Conv2D) (None, 30, 30, 32) 896 ____________________________________________________________________________________________________ batch_normalization_19 (BatchNormalization) (None, 30, 30, 32) 128 ____________________________________________________________________________________________________ activation_12 (Activation) (None, 30, 30, 32) 0 ____________________________________________________________________________________________________ max_pooling2d_35 (MaxPooling2D) (None, 15, 15, 32) 0 ____________________________________________________________________________________________________ conv2d_39 (Conv2D) (None, 13, 13, 64) 18496 ____________________________________________________________________________________________________ batch_normalization_20 (BatchNormalization) (None, 13, 13, 64) 256 ____________________________________________________________________________________________________ activation_13 (Activation) (None, 13, 13, 64) 0 ____________________________________________________________________________________________________ max_pooling2d_36 (MaxPooling2D) (None, 6, 6, 64) 0 ____________________________________________________________________________________________________ conv2d_40 (Conv2D) (None, 4, 4, 64) 36928 ____________________________________________________________________________________________________ batch_normalization_21 (BatchNormalization) (None, 4, 4, 64) 256 ____________________________________________________________________________________________________ activation_14 (Activation) (None, 4, 4, 64) 0 ____________________________________________________________________________________________________ max_pooling2d_37 (MaxPooling2D) (None, 2, 2, 64) 0 ____________________________________________________________________________________________________ flatten_13 (Flatten) (None, 256) 0 ____________________________________________________________________________________________________ dense_31 (Dense) (None, 64) 16448 ____________________________________________________________________________________________________ dense_32 (Dense) (None, 10) 650 ==================================================================================================== Total params: 74,058 Trainable params: 73,738 Non-trainable params: 320 ____________________________________________________________________________________________________
history = model.fit(X_train, y_train_c, batch_size=128, epochs=15, verbose=1, validation_split=0.2)
Epoch 1/15 313/313 [==============================] - 2s 4ms/step - loss: 1.6406 - accuracy: 0.4054 - val_loss: 1.6940 - val_accuracy: 0.4527 Epoch 2/15 313/313 [==============================] - 1s 3ms/step - loss: 1.0853 - accuracy: 0.6179 - val_loss: 1.1967 - val_accuracy: 0.5889 Epoch 3/15 313/313 [==============================] - 1s 4ms/step - loss: 0.9054 - accuracy: 0.6866 - val_loss: 1.4202 - val_accuracy: 0.5322 Epoch 4/15 313/313 [==============================] - 1s 3ms/step - loss: 0.8000 - accuracy: 0.7207 - val_loss: 0.9814 - val_accuracy: 0.6544 Epoch 5/15 313/313 [==============================] - 1s 4ms/step - loss: 0.7222 - accuracy: 0.7493 - val_loss: 0.8527 - val_accuracy: 0.7049 Epoch 6/15 313/313 [==============================] - 1s 3ms/step - loss: 0.6466 - accuracy: 0.7747 - val_loss: 0.8420 - val_accuracy: 0.7078 Epoch 7/15 313/313 [==============================] - 1s 3ms/step - loss: 0.5931 - accuracy: 0.7970 - val_loss: 0.8680 - val_accuracy: 0.7050 Epoch 8/15 313/313 [==============================] - 1s 3ms/step - loss: 0.5406 - accuracy: 0.8135 - val_loss: 0.8884 - val_accuracy: 0.7056 Epoch 9/15 313/313 [==============================] - 1s 3ms/step - loss: 0.5057 - accuracy: 0.8275 - val_loss: 1.0175 - val_accuracy: 0.6615 Epoch 10/15 313/313 [==============================] - 1s 3ms/step - loss: 0.4724 - accuracy: 0.8367 - val_loss: 0.8811 - val_accuracy: 0.7098 Epoch 11/15 313/313 [==============================] - 1s 4ms/step - loss: 0.4400 - accuracy: 0.8485 - val_loss: 0.9070 - val_accuracy: 0.7067 Epoch 12/15 313/313 [==============================] - 1s 3ms/step - loss: 0.4023 - accuracy: 0.8628 - val_loss: 0.9552 - val_accuracy: 0.6972 Epoch 13/15 313/313 [==============================] - 1s 3ms/step - loss: 0.3682 - accuracy: 0.8738 - val_loss: 0.9099 - val_accuracy: 0.7162 Epoch 14/15 313/313 [==============================] - 1s 4ms/step - loss: 0.3433 - accuracy: 0.8835 - val_loss: 1.0859 - val_accuracy: 0.6728 Epoch 15/15 313/313 [==============================] - 1s 3ms/step - loss: 0.3202 - accuracy: 0.8922 - val_loss: 1.0137 - val_accuracy: 0.7045
score = model.evaluate(X_test, y_test_c, batch_size=128, verbose=0)
for i in range(len(score)):
print("Test " + model.metrics_names[i] + " = %.3f" % score[i])
Test loss = 1.025 Test accuracy = 0.701
PlotModelEval(model, history, X_test, y_test, cifar_labels)
Train the model and save the evaluation image for the report.
When using BatchNorm one must take care to select a good minibatch size. Describe what problems might arise if the wrong minibatch size is used.
You can reason about this given the description of BatchNorm above, or you can search for the information in other sources. Do not forget to provide links to the sources if you do!
We now want you to create your own model based on what you have learned. We want you to experiment and see what works and what doesn't, so don't go crazy with the number of epochs until you think you have something that works.
To pass this assignment, we want you to acheive 75% accuracy on the test data in no more than 25 epochs. This is possible using the layers and techniques we have explored in this notebook, but you are free to use any other methods that we didn't cover. (You are obviously not allowed to cheat, for example by training on the test data.)
from tensorflow.keras.utils import plot_model
x_in = Input(shape=X_train.shape[1:])
# === Add your code here ===
x = Conv2D(32, kernel_size=(3, 3))(x_in)
x = BatchNormalization(momentum=0.99)(x)
x = Activation("relu")(x)
x = Conv2D(32, kernel_size=(3, 3))(x)
x = BatchNormalization(momentum=0.99)(x)
x = Activation("relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, kernel_size=(3, 3))(x)
x = BatchNormalization(momentum=0.99)(x)
x = Activation("relu")(x)
x = Conv2D(64, kernel_size=(3, 3))(x)
x = BatchNormalization(momentum=0.99)(x)
x = Activation("relu")(x)
x = Conv2D(128, kernel_size=(3, 3))(x)
x = BatchNormalization(momentum=0.99)(x)
x = Activation("relu")(x)
x = MaxPooling2D((2, 2))(x)
x = Dropout(0.45)(x)
x = Flatten()(x)
x = Dense(512, activation="swish")(x)
x = BatchNormalization(momentum=0.99)(x)
x = Dense(256, activation="swish")(x)
x = BatchNormalization(momentum=0.99)(x)
x = Dense(10, activation = "softmax")(x)
# ==========================
model = Model(inputs=x_in, outputs=x)
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=sgd)
model.summary(100)
plot_model(model, show_shapes=True, show_layer_names=False)
Model: "model_18" ____________________________________________________________________________________________________ Layer (type) Output Shape Param # ==================================================================================================== input_22 (InputLayer) [(None, 32, 32, 3)] 0 ____________________________________________________________________________________________________ conv2d_72 (Conv2D) (None, 30, 30, 32) 896 ____________________________________________________________________________________________________ batch_normalization_57 (BatchNormalization) (None, 30, 30, 32) 128 ____________________________________________________________________________________________________ activation_26 (Activation) (None, 30, 30, 32) 0 ____________________________________________________________________________________________________ conv2d_73 (Conv2D) (None, 28, 28, 32) 9248 ____________________________________________________________________________________________________ batch_normalization_58 (BatchNormalization) (None, 28, 28, 32) 128 ____________________________________________________________________________________________________ activation_27 (Activation) (None, 28, 28, 32) 0 ____________________________________________________________________________________________________ max_pooling2d_51 (MaxPooling2D) (None, 14, 14, 32) 0 ____________________________________________________________________________________________________ dropout_35 (Dropout) (None, 14, 14, 32) 0 ____________________________________________________________________________________________________ conv2d_74 (Conv2D) (None, 12, 12, 64) 18496 ____________________________________________________________________________________________________ batch_normalization_59 (BatchNormalization) (None, 12, 12, 64) 256 ____________________________________________________________________________________________________ activation_28 (Activation) (None, 12, 12, 64) 0 ____________________________________________________________________________________________________ conv2d_75 (Conv2D) (None, 10, 10, 64) 36928 ____________________________________________________________________________________________________ batch_normalization_60 (BatchNormalization) (None, 10, 10, 64) 256 ____________________________________________________________________________________________________ activation_29 (Activation) (None, 10, 10, 64) 0 ____________________________________________________________________________________________________ conv2d_76 (Conv2D) (None, 8, 8, 128) 73856 ____________________________________________________________________________________________________ batch_normalization_61 (BatchNormalization) (None, 8, 8, 128) 512 ____________________________________________________________________________________________________ activation_30 (Activation) (None, 8, 8, 128) 0 ____________________________________________________________________________________________________ max_pooling2d_52 (MaxPooling2D) (None, 4, 4, 128) 0 ____________________________________________________________________________________________________ dropout_36 (Dropout) (None, 4, 4, 128) 0 ____________________________________________________________________________________________________ flatten_16 (Flatten) (None, 2048) 0 ____________________________________________________________________________________________________ dense_45 (Dense) (None, 512) 1049088 ____________________________________________________________________________________________________ batch_normalization_62 (BatchNormalization) (None, 512) 2048 ____________________________________________________________________________________________________ dense_46 (Dense) (None, 256) 131328 ____________________________________________________________________________________________________ batch_normalization_63 (BatchNormalization) (None, 256) 1024 ____________________________________________________________________________________________________ dense_47 (Dense) (None, 10) 2570 ==================================================================================================== Total params: 1,326,762 Trainable params: 1,324,586 Non-trainable params: 2,176 ____________________________________________________________________________________________________
history = model.fit(X_train, y_train_c, batch_size=256, epochs=25, verbose=1, validation_split=0.2)
Epoch 1/25 157/157 [==============================] - 3s 14ms/step - loss: 2.0663 - accuracy: 0.2986 - val_loss: 2.9033 - val_accuracy: 0.1096 Epoch 2/25 157/157 [==============================] - 2s 13ms/step - loss: 1.3303 - accuracy: 0.5175 - val_loss: 2.3145 - val_accuracy: 0.2160 Epoch 3/25 157/157 [==============================] - 2s 13ms/step - loss: 1.1536 - accuracy: 0.5862 - val_loss: 1.3041 - val_accuracy: 0.5179 Epoch 4/25 157/157 [==============================] - 2s 13ms/step - loss: 1.0374 - accuracy: 0.6291 - val_loss: 1.4633 - val_accuracy: 0.5032 Epoch 5/25 157/157 [==============================] - 2s 13ms/step - loss: 0.9655 - accuracy: 0.6549 - val_loss: 1.0543 - val_accuracy: 0.6303 Epoch 6/25 157/157 [==============================] - 2s 13ms/step - loss: 0.8974 - accuracy: 0.6802 - val_loss: 0.9883 - val_accuracy: 0.6521 Epoch 7/25 157/157 [==============================] - 2s 13ms/step - loss: 0.8446 - accuracy: 0.6988 - val_loss: 0.9170 - val_accuracy: 0.6878 Epoch 8/25 157/157 [==============================] - 2s 13ms/step - loss: 0.7977 - accuracy: 0.7158 - val_loss: 0.9187 - val_accuracy: 0.6781 Epoch 9/25 157/157 [==============================] - 2s 13ms/step - loss: 0.7522 - accuracy: 0.7311 - val_loss: 0.8215 - val_accuracy: 0.7139 Epoch 10/25 157/157 [==============================] - 2s 13ms/step - loss: 0.7290 - accuracy: 0.7411 - val_loss: 0.7356 - val_accuracy: 0.7456 Epoch 11/25 157/157 [==============================] - 2s 13ms/step - loss: 0.6838 - accuracy: 0.7593 - val_loss: 0.7937 - val_accuracy: 0.7256 Epoch 12/25 157/157 [==============================] - 2s 13ms/step - loss: 0.6639 - accuracy: 0.7634 - val_loss: 1.1348 - val_accuracy: 0.6472 Epoch 13/25 157/157 [==============================] - 2s 13ms/step - loss: 0.6444 - accuracy: 0.7687 - val_loss: 0.8130 - val_accuracy: 0.7193 Epoch 14/25 157/157 [==============================] - 2s 13ms/step - loss: 0.6242 - accuracy: 0.7773 - val_loss: 0.8279 - val_accuracy: 0.7205 Epoch 15/25 157/157 [==============================] - 2s 13ms/step - loss: 0.5902 - accuracy: 0.7891 - val_loss: 0.7101 - val_accuracy: 0.7566 Epoch 16/25 157/157 [==============================] - 2s 13ms/step - loss: 0.5698 - accuracy: 0.7938 - val_loss: 0.6917 - val_accuracy: 0.7614 Epoch 17/25 157/157 [==============================] - 2s 13ms/step - loss: 0.5597 - accuracy: 0.7983 - val_loss: 0.7156 - val_accuracy: 0.7596 Epoch 18/25 157/157 [==============================] - 2s 13ms/step - loss: 0.5378 - accuracy: 0.8091 - val_loss: 0.7015 - val_accuracy: 0.7614 Epoch 19/25 157/157 [==============================] - 2s 13ms/step - loss: 0.5137 - accuracy: 0.8177 - val_loss: 0.7991 - val_accuracy: 0.7372 Epoch 20/25 157/157 [==============================] - 2s 13ms/step - loss: 0.5045 - accuracy: 0.8186 - val_loss: 0.7371 - val_accuracy: 0.7552 Epoch 21/25 157/157 [==============================] - 2s 13ms/step - loss: 0.4836 - accuracy: 0.8288 - val_loss: 0.6200 - val_accuracy: 0.7901 Epoch 22/25 157/157 [==============================] - 2s 13ms/step - loss: 0.4822 - accuracy: 0.8296 - val_loss: 0.6694 - val_accuracy: 0.7719 Epoch 23/25 157/157 [==============================] - 2s 13ms/step - loss: 0.4545 - accuracy: 0.8384 - val_loss: 0.8302 - val_accuracy: 0.7302 Epoch 24/25 157/157 [==============================] - 2s 13ms/step - loss: 0.4336 - accuracy: 0.8425 - val_loss: 0.6779 - val_accuracy: 0.7781 Epoch 25/25 157/157 [==============================] - 2s 13ms/step - loss: 0.4344 - accuracy: 0.8458 - val_loss: 0.6890 - val_accuracy: 0.7714
score = model.evaluate(X_test, y_test_c, batch_size=128, verbose=0)
for i in range(len(score)):
print("Test " + model.metrics_names[i] + " = %.3f" % score[i])
Test loss = 0.709 Test accuracy = 0.768
PlotModelEval(model, history, X_test, y_test, cifar_labels)
Design and train a model that achieves at least 75% test accuracy in at most 25 epochs. Save the evaluation image for the report. Also, in the report you should explain your model and motivate the design choices you have made.
For those of you that feel the competitive spark right now, we will hold an optional competition where you can submit your trained model-file for evaluation. To make this fair, you are not allowed to train for more than 50 epochs, but other than that we want you to get creative. The competition is simple, we will evaluate all submitted models and the model with highest test accuracy wins. The prize is nothing less than eternal glory.
Here are some things to look into, but note that we don't have the answers here. Any of these might improve the performance, or might not, or it might only work in combination with each other. This is up to you to figure out. This is how deep learning research often happens, trying things in a smart way to see what works best.
Write your competition model here. This way you can try different things without deleting the model you created above. Also set the GroupName variable to your LiU IDs or some unique name; that way our scripts can be a lot easier, thanks and good luck :)
from tensorflow.keras.layers import Add, AveragePooling2D, ZeroPadding2D, MaxPool2D, concatenate, LocallyConnected2D
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.initializers import GlorotNormal
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.activations import swish
# Set initializer to Xavier normal
xav_initializer = GlorotNormal()
## Data augmentation
aug = ImageDataGenerator(width_shift_range = 0.1, height_shift_range = 0.1, horizontal_flip = True, fill_mode = "nearest", validation_split = 0.05)
aug_train = aug.flow(X_train, y_train_c, batch_size = 64, subset = "training")
aug_vali = aug.flow(X_train, y_train_c, batch_size = 64, subset = "validation")
def convolutional_block(x, n_filters ,filter_sizeX, filter_sizeY, stride, axises, padding = "same"):
"""Basic convolutional block; with 2D convolution, then batch normalization and finally activating with relu"""
x = Conv2D(n_filters,(filter_sizeX, filter_sizeY), strides=stride, padding=padding, kernel_initializer = xav_initializer)(x)
x = BatchNormalization(axis = axises)(x)
x = Activation("swish")(x)
return x
def inception_block(x, conv1_size, conv2_size, conv5_size, n_pooling, axises):
"""Three separate convolutional blocks (1x1, 3x3 and 5x5) and a block of maxpooling followed by a convolution.
Then put together all of them and return """
conv_1 = convolutional_block(x, conv1_size, 1, 1,(1, 1), axises)
conv_3 = convolutional_block(x, conv2_size, 3, 3,(1, 1), axises)
conv_5 = convolutional_block(x, conv5_size, 5, 5,(1, 1), axises)
pooled = MaxPooling2D((3, 3), strides=(1, 1), padding = "same")(x)
pooled = Conv2D(n_pooling, (1, 1), padding = "same", activation = "swish", kernel_initializer = xav_initializer)(pooled)
x = concatenate([conv_1, conv_3, conv_5, pooled], axis=axises)
return x
def downsample_block(x, n_filters, axises):
"""For a given input runs one convolutional block, and a pooling of input then puts together"""
conv_3 = convolutional_block(x, n_filters,3,3,(2,2), axises, padding = "valid")
pool = MaxPooling2D((3,3),strides=(2,2))(x)
x = concatenate([conv_3,pool],axis = axises)
return x
n_epochs = 50
init_lr = 5e-3
def polynomial_decay(epoch):
"""Function to allow for polynomial decay of the learning rate"""
maxEpochs = n_epochs
baseLR = init_lr
alpha = baseLR * (1 - (epoch / float(maxEpochs)))
return alpha
GroupName = "AI4Good"
x_in = Input(shape=X_train.shape[1:])
x = convolutional_block(x_in, n_filters = 96, filter_sizeX = 3, filter_sizeY = 3, stride = (1, 1), axises = -1)
x = inception_block(x, conv1_size = 32, conv2_size = 32, conv5_size = 32, n_pooling = 32, axises = -1)
x_split1 = inception_block(x, conv1_size = 32, conv2_size = 48, conv5_size = 48, n_pooling = 32, axises = -1)
x = downsample_block(x_split1, n_filters = 80, axises = -1)
x1 = downsample_block(x_split1, n_filters = 80, axises = -1)
x2 = downsample_block(x_split1, n_filters = 80, axises = -1)
x = inception_block(x, conv1_size = 112, conv2_size = 48, conv5_size = 32, n_pooling = 48, axises = -1)
x = inception_block(x, conv1_size = 96, conv2_size = 64, conv5_size = 32, n_pooling = 32, axises = -1) # Output (15,15, 224)
x1 = inception_block(x1, conv1_size = 112, conv2_size = 48, conv5_size = 32, n_pooling = 48, axises = -1)
x1 = inception_block(x1, conv1_size = 96, conv2_size = 64, conv5_size = 32, n_pooling = 32, axises = -1) # Output (15,15,224)
x2 = inception_block(x2, conv1_size = 112, conv2_size = 48, conv5_size = 32, n_pooling = 48, axises = -1)
x2 = inception_block(x2, conv1_size = 96, conv2_size = 64, conv5_size = 32, n_pooling = 32, axises = -1) # Output (15,15,224)
x_partial1 = concatenate([x,x1], axis = -1)
x_partial1 = convolutional_block(x_partial1, n_filters = 336, filter_sizeX = 3, filter_sizeY=3, stride = (1,1), axises = -1)
x_partial2 = concatenate([x1,x2], axis = -1)
x_partial2 = convolutional_block(x_partial2, n_filters = 336, filter_sizeX = 3, filter_sizeY=3, stride = (1,1), axises = -1)
x_split2 = concatenate([x_partial1, x_partial2], axis = -1) # Output (15,15, 672)
x_split2 = convolutional_block(x_split2, n_filters = 336, filter_sizeX = 3, filter_sizeY=3, stride = (1,1), axises = -1)
x_split2 = convolutional_block(x_split2, n_filters = 224, filter_sizeX = 1, filter_sizeY=1, stride = (1,1), axises = -1)
x = inception_block(x_split2, conv1_size = 80, conv2_size = 80, conv5_size = 32, n_pooling = 32, axises = -1)
x1 = inception_block(x_split2, conv1_size = 80, conv2_size = 80, conv5_size = 32, n_pooling = 32, axises = -1)
x = inception_block(x, conv1_size = 48, conv2_size = 96, conv5_size = 32, n_pooling = 32, axises = -1)
x = inception_block(x, conv1_size = 112, conv2_size = 48, conv5_size = 32, n_pooling = 48, axises = -1)
x1 = inception_block(x1, conv1_size = 48, conv2_size = 96, conv5_size = 32, n_pooling = 32, axises = -1)
x1 = inception_block(x1, conv1_size = 112, conv2_size = 48, conv5_size = 32, n_pooling = 48, axises = -1)
x = downsample_block(x, n_filters = 96, axises = -1)
x1 = downsample_block(x1, n_filters = 96, axises = -1)
x_split3 = concatenate([x,x1], axis = -1) # Ouput(7,7,372)
x_split3 = convolutional_block(x_split3, n_filters = 504, filter_sizeX = 3, filter_sizeY = 3, stride = (1,1), axises = -1)
x_split3 = convolutional_block(x_split3, n_filters = 336, filter_sizeX = 1, filter_sizeY = 1, stride = (1,1), axises = -1)
x = inception_block(x_split3, conv1_size = 176, conv2_size = 160, conv5_size = 96, n_pooling = 96, axises = -1)
x = inception_block(x, conv1_size = 176, conv2_size = 160, conv5_size = 96, n_pooling = 96, axises = -1)
x1 = inception_block(x_split3, conv1_size = 176, conv2_size = 160, conv5_size = 96, n_pooling = 96, axises = -1)
x1 = inception_block(x1, conv1_size = 176, conv2_size = 160, conv5_size = 96, n_pooling = 96, axises = -1)
x = concatenate([x,x1], axis = -1)
x = AveragePooling2D((7, 7))(x)
x = Dropout(0.5)(x)
x = Flatten()(x)
x = Dense(10)(x)
x = Activation("softmax")(x)
model = Model(inputs=x_in, outputs=x, name=GroupName)
callback = [LearningRateScheduler(polynomial_decay)]
#opt = Adam(lr=init_lr, beta_1=0.9, beta_2=0.999, epsilon=1e-07)
opt = SGD(lr = init_lr, momentum = 0.9)
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=opt)
# Print the summary and model image
2022-02-24 10:24:27.475021: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2022-02-24 10:24:27.475693: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-24 10:24:27.475912: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2080 SUPER computeCapability: 7.5 coreClock: 1.815GHz coreCount: 48 deviceMemorySize: 7.77GiB deviceMemoryBandwidth: 462.00GiB/s 2022-02-24 10:24:27.475932: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1 2022-02-24 10:24:27.475948: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10 2022-02-24 10:24:27.475957: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10 2022-02-24 10:24:27.475964: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10 2022-02-24 10:24:27.475971: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10 2022-02-24 10:24:27.475979: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.10 2022-02-24 10:24:27.475986: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.10 2022-02-24 10:24:27.475993: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7 2022-02-24 10:24:27.476030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-24 10:24:27.476237: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-24 10:24:27.476419: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0 2022-02-24 10:24:27.476645: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1 2022-02-24 10:24:28.064281: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1261] Device interconnect StreamExecutor with strength 1 edge matrix: 2022-02-24 10:24:28.064298: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1267] 0 2022-02-24 10:24:28.064302: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1280] 0: N 2022-02-24 10:24:28.064600: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-24 10:24:28.064828: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-24 10:24:28.065028: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-02-24 10:24:28.065215: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1406] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6677 MB memory) -> physical GPU (device: 0, name: NVIDIA GeForce RTX 2080 SUPER, pci bus id: 0000:01:00.0, compute capability: 7.5) 2022-02-24 10:24:28.066068: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
model.summary(100)
plot_model(model, show_shapes=True, show_layer_names=False)
Model: "AI4Good"
____________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
====================================================================================================
input_1 (InputLayer) [(None, 32, 32, 3)] 0
____________________________________________________________________________________________________
conv2d (Conv2D) (None, 32, 32, 96) 2688 input_1[0][0]
____________________________________________________________________________________________________
batch_normalization (BatchNormal (None, 32, 32, 96) 384 conv2d[0][0]
____________________________________________________________________________________________________
activation (Activation) (None, 32, 32, 96) 0 batch_normalization[0][0]
____________________________________________________________________________________________________
conv2d_1 (Conv2D) (None, 32, 32, 32) 3104 activation[0][0]
____________________________________________________________________________________________________
conv2d_2 (Conv2D) (None, 32, 32, 32) 27680 activation[0][0]
____________________________________________________________________________________________________
conv2d_3 (Conv2D) (None, 32, 32, 32) 76832 activation[0][0]
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, 32, 32, 32) 128 conv2d_1[0][0]
____________________________________________________________________________________________________
batch_normalization_2 (BatchNorm (None, 32, 32, 32) 128 conv2d_2[0][0]
____________________________________________________________________________________________________
batch_normalization_3 (BatchNorm (None, 32, 32, 32) 128 conv2d_3[0][0]
____________________________________________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 32, 32, 96) 0 activation[0][0]
____________________________________________________________________________________________________
activation_1 (Activation) (None, 32, 32, 32) 0 batch_normalization_1[0][0]
____________________________________________________________________________________________________
activation_2 (Activation) (None, 32, 32, 32) 0 batch_normalization_2[0][0]
____________________________________________________________________________________________________
activation_3 (Activation) (None, 32, 32, 32) 0 batch_normalization_3[0][0]
____________________________________________________________________________________________________
conv2d_4 (Conv2D) (None, 32, 32, 32) 3104 max_pooling2d[0][0]
____________________________________________________________________________________________________
concatenate (Concatenate) (None, 32, 32, 128) 0 activation_1[0][0]
activation_2[0][0]
activation_3[0][0]
conv2d_4[0][0]
____________________________________________________________________________________________________
conv2d_5 (Conv2D) (None, 32, 32, 32) 4128 concatenate[0][0]
____________________________________________________________________________________________________
conv2d_6 (Conv2D) (None, 32, 32, 48) 55344 concatenate[0][0]
____________________________________________________________________________________________________
conv2d_7 (Conv2D) (None, 32, 32, 48) 153648 concatenate[0][0]
____________________________________________________________________________________________________
batch_normalization_4 (BatchNorm (None, 32, 32, 32) 128 conv2d_5[0][0]
____________________________________________________________________________________________________
batch_normalization_5 (BatchNorm (None, 32, 32, 48) 192 conv2d_6[0][0]
____________________________________________________________________________________________________
batch_normalization_6 (BatchNorm (None, 32, 32, 48) 192 conv2d_7[0][0]
____________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D) (None, 32, 32, 128) 0 concatenate[0][0]
____________________________________________________________________________________________________
activation_4 (Activation) (None, 32, 32, 32) 0 batch_normalization_4[0][0]
____________________________________________________________________________________________________
activation_5 (Activation) (None, 32, 32, 48) 0 batch_normalization_5[0][0]
____________________________________________________________________________________________________
activation_6 (Activation) (None, 32, 32, 48) 0 batch_normalization_6[0][0]
____________________________________________________________________________________________________
conv2d_8 (Conv2D) (None, 32, 32, 32) 4128 max_pooling2d_1[0][0]
____________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 32, 32, 160) 0 activation_4[0][0]
activation_5[0][0]
activation_6[0][0]
conv2d_8[0][0]
____________________________________________________________________________________________________
conv2d_9 (Conv2D) (None, 15, 15, 80) 115280 concatenate_1[0][0]
____________________________________________________________________________________________________
conv2d_10 (Conv2D) (None, 15, 15, 80) 115280 concatenate_1[0][0]
____________________________________________________________________________________________________
conv2d_11 (Conv2D) (None, 15, 15, 80) 115280 concatenate_1[0][0]
____________________________________________________________________________________________________
batch_normalization_7 (BatchNorm (None, 15, 15, 80) 320 conv2d_9[0][0]
____________________________________________________________________________________________________
batch_normalization_8 (BatchNorm (None, 15, 15, 80) 320 conv2d_10[0][0]
____________________________________________________________________________________________________
batch_normalization_9 (BatchNorm (None, 15, 15, 80) 320 conv2d_11[0][0]
____________________________________________________________________________________________________
activation_7 (Activation) (None, 15, 15, 80) 0 batch_normalization_7[0][0]
____________________________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D) (None, 15, 15, 160) 0 concatenate_1[0][0]
____________________________________________________________________________________________________
activation_8 (Activation) (None, 15, 15, 80) 0 batch_normalization_8[0][0]
____________________________________________________________________________________________________
max_pooling2d_3 (MaxPooling2D) (None, 15, 15, 160) 0 concatenate_1[0][0]
____________________________________________________________________________________________________
activation_9 (Activation) (None, 15, 15, 80) 0 batch_normalization_9[0][0]
____________________________________________________________________________________________________
max_pooling2d_4 (MaxPooling2D) (None, 15, 15, 160) 0 concatenate_1[0][0]
____________________________________________________________________________________________________
concatenate_2 (Concatenate) (None, 15, 15, 240) 0 activation_7[0][0]
max_pooling2d_2[0][0]
____________________________________________________________________________________________________
concatenate_3 (Concatenate) (None, 15, 15, 240) 0 activation_8[0][0]
max_pooling2d_3[0][0]
____________________________________________________________________________________________________
concatenate_4 (Concatenate) (None, 15, 15, 240) 0 activation_9[0][0]
max_pooling2d_4[0][0]
____________________________________________________________________________________________________
conv2d_12 (Conv2D) (None, 15, 15, 112) 26992 concatenate_2[0][0]
____________________________________________________________________________________________________
conv2d_13 (Conv2D) (None, 15, 15, 48) 103728 concatenate_2[0][0]
____________________________________________________________________________________________________
conv2d_14 (Conv2D) (None, 15, 15, 32) 192032 concatenate_2[0][0]
____________________________________________________________________________________________________
conv2d_20 (Conv2D) (None, 15, 15, 112) 26992 concatenate_3[0][0]
____________________________________________________________________________________________________
conv2d_21 (Conv2D) (None, 15, 15, 48) 103728 concatenate_3[0][0]
____________________________________________________________________________________________________
conv2d_22 (Conv2D) (None, 15, 15, 32) 192032 concatenate_3[0][0]
____________________________________________________________________________________________________
conv2d_28 (Conv2D) (None, 15, 15, 112) 26992 concatenate_4[0][0]
____________________________________________________________________________________________________
conv2d_29 (Conv2D) (None, 15, 15, 48) 103728 concatenate_4[0][0]
____________________________________________________________________________________________________
conv2d_30 (Conv2D) (None, 15, 15, 32) 192032 concatenate_4[0][0]
____________________________________________________________________________________________________
batch_normalization_10 (BatchNor (None, 15, 15, 112) 448 conv2d_12[0][0]
____________________________________________________________________________________________________
batch_normalization_11 (BatchNor (None, 15, 15, 48) 192 conv2d_13[0][0]
____________________________________________________________________________________________________
batch_normalization_12 (BatchNor (None, 15, 15, 32) 128 conv2d_14[0][0]
____________________________________________________________________________________________________
max_pooling2d_5 (MaxPooling2D) (None, 15, 15, 240) 0 concatenate_2[0][0]
____________________________________________________________________________________________________
batch_normalization_16 (BatchNor (None, 15, 15, 112) 448 conv2d_20[0][0]
____________________________________________________________________________________________________
batch_normalization_17 (BatchNor (None, 15, 15, 48) 192 conv2d_21[0][0]
____________________________________________________________________________________________________
batch_normalization_18 (BatchNor (None, 15, 15, 32) 128 conv2d_22[0][0]
____________________________________________________________________________________________________
max_pooling2d_7 (MaxPooling2D) (None, 15, 15, 240) 0 concatenate_3[0][0]
____________________________________________________________________________________________________
batch_normalization_22 (BatchNor (None, 15, 15, 112) 448 conv2d_28[0][0]
____________________________________________________________________________________________________
batch_normalization_23 (BatchNor (None, 15, 15, 48) 192 conv2d_29[0][0]
____________________________________________________________________________________________________
batch_normalization_24 (BatchNor (None, 15, 15, 32) 128 conv2d_30[0][0]
____________________________________________________________________________________________________
max_pooling2d_9 (MaxPooling2D) (None, 15, 15, 240) 0 concatenate_4[0][0]
____________________________________________________________________________________________________
activation_10 (Activation) (None, 15, 15, 112) 0 batch_normalization_10[0][0]
____________________________________________________________________________________________________
activation_11 (Activation) (None, 15, 15, 48) 0 batch_normalization_11[0][0]
____________________________________________________________________________________________________
activation_12 (Activation) (None, 15, 15, 32) 0 batch_normalization_12[0][0]
____________________________________________________________________________________________________
conv2d_15 (Conv2D) (None, 15, 15, 48) 11568 max_pooling2d_5[0][0]
____________________________________________________________________________________________________
activation_16 (Activation) (None, 15, 15, 112) 0 batch_normalization_16[0][0]
____________________________________________________________________________________________________
activation_17 (Activation) (None, 15, 15, 48) 0 batch_normalization_17[0][0]
____________________________________________________________________________________________________
activation_18 (Activation) (None, 15, 15, 32) 0 batch_normalization_18[0][0]
____________________________________________________________________________________________________
conv2d_23 (Conv2D) (None, 15, 15, 48) 11568 max_pooling2d_7[0][0]
____________________________________________________________________________________________________
activation_22 (Activation) (None, 15, 15, 112) 0 batch_normalization_22[0][0]
____________________________________________________________________________________________________
activation_23 (Activation) (None, 15, 15, 48) 0 batch_normalization_23[0][0]
____________________________________________________________________________________________________
activation_24 (Activation) (None, 15, 15, 32) 0 batch_normalization_24[0][0]
____________________________________________________________________________________________________
conv2d_31 (Conv2D) (None, 15, 15, 48) 11568 max_pooling2d_9[0][0]
____________________________________________________________________________________________________
concatenate_5 (Concatenate) (None, 15, 15, 240) 0 activation_10[0][0]
activation_11[0][0]
activation_12[0][0]
conv2d_15[0][0]
____________________________________________________________________________________________________
concatenate_7 (Concatenate) (None, 15, 15, 240) 0 activation_16[0][0]
activation_17[0][0]
activation_18[0][0]
conv2d_23[0][0]
____________________________________________________________________________________________________
concatenate_9 (Concatenate) (None, 15, 15, 240) 0 activation_22[0][0]
activation_23[0][0]
activation_24[0][0]
conv2d_31[0][0]
____________________________________________________________________________________________________
conv2d_16 (Conv2D) (None, 15, 15, 96) 23136 concatenate_5[0][0]
____________________________________________________________________________________________________
conv2d_17 (Conv2D) (None, 15, 15, 64) 138304 concatenate_5[0][0]
____________________________________________________________________________________________________
conv2d_18 (Conv2D) (None, 15, 15, 32) 192032 concatenate_5[0][0]
____________________________________________________________________________________________________
conv2d_24 (Conv2D) (None, 15, 15, 96) 23136 concatenate_7[0][0]
____________________________________________________________________________________________________
conv2d_25 (Conv2D) (None, 15, 15, 64) 138304 concatenate_7[0][0]
____________________________________________________________________________________________________
conv2d_26 (Conv2D) (None, 15, 15, 32) 192032 concatenate_7[0][0]
____________________________________________________________________________________________________
conv2d_32 (Conv2D) (None, 15, 15, 96) 23136 concatenate_9[0][0]
____________________________________________________________________________________________________
conv2d_33 (Conv2D) (None, 15, 15, 64) 138304 concatenate_9[0][0]
____________________________________________________________________________________________________
conv2d_34 (Conv2D) (None, 15, 15, 32) 192032 concatenate_9[0][0]
____________________________________________________________________________________________________
batch_normalization_13 (BatchNor (None, 15, 15, 96) 384 conv2d_16[0][0]
____________________________________________________________________________________________________
batch_normalization_14 (BatchNor (None, 15, 15, 64) 256 conv2d_17[0][0]
____________________________________________________________________________________________________
batch_normalization_15 (BatchNor (None, 15, 15, 32) 128 conv2d_18[0][0]
____________________________________________________________________________________________________
max_pooling2d_6 (MaxPooling2D) (None, 15, 15, 240) 0 concatenate_5[0][0]
____________________________________________________________________________________________________
batch_normalization_19 (BatchNor (None, 15, 15, 96) 384 conv2d_24[0][0]
____________________________________________________________________________________________________
batch_normalization_20 (BatchNor (None, 15, 15, 64) 256 conv2d_25[0][0]
____________________________________________________________________________________________________
batch_normalization_21 (BatchNor (None, 15, 15, 32) 128 conv2d_26[0][0]
____________________________________________________________________________________________________
max_pooling2d_8 (MaxPooling2D) (None, 15, 15, 240) 0 concatenate_7[0][0]
____________________________________________________________________________________________________
batch_normalization_25 (BatchNor (None, 15, 15, 96) 384 conv2d_32[0][0]
____________________________________________________________________________________________________
batch_normalization_26 (BatchNor (None, 15, 15, 64) 256 conv2d_33[0][0]
____________________________________________________________________________________________________
batch_normalization_27 (BatchNor (None, 15, 15, 32) 128 conv2d_34[0][0]
____________________________________________________________________________________________________
max_pooling2d_10 (MaxPooling2D) (None, 15, 15, 240) 0 concatenate_9[0][0]
____________________________________________________________________________________________________
activation_13 (Activation) (None, 15, 15, 96) 0 batch_normalization_13[0][0]
____________________________________________________________________________________________________
activation_14 (Activation) (None, 15, 15, 64) 0 batch_normalization_14[0][0]
____________________________________________________________________________________________________
activation_15 (Activation) (None, 15, 15, 32) 0 batch_normalization_15[0][0]
____________________________________________________________________________________________________
conv2d_19 (Conv2D) (None, 15, 15, 32) 7712 max_pooling2d_6[0][0]
____________________________________________________________________________________________________
activation_19 (Activation) (None, 15, 15, 96) 0 batch_normalization_19[0][0]
____________________________________________________________________________________________________
activation_20 (Activation) (None, 15, 15, 64) 0 batch_normalization_20[0][0]
____________________________________________________________________________________________________
activation_21 (Activation) (None, 15, 15, 32) 0 batch_normalization_21[0][0]
____________________________________________________________________________________________________
conv2d_27 (Conv2D) (None, 15, 15, 32) 7712 max_pooling2d_8[0][0]
____________________________________________________________________________________________________
activation_25 (Activation) (None, 15, 15, 96) 0 batch_normalization_25[0][0]
____________________________________________________________________________________________________
activation_26 (Activation) (None, 15, 15, 64) 0 batch_normalization_26[0][0]
____________________________________________________________________________________________________
activation_27 (Activation) (None, 15, 15, 32) 0 batch_normalization_27[0][0]
____________________________________________________________________________________________________
conv2d_35 (Conv2D) (None, 15, 15, 32) 7712 max_pooling2d_10[0][0]
____________________________________________________________________________________________________
concatenate_6 (Concatenate) (None, 15, 15, 224) 0 activation_13[0][0]
activation_14[0][0]
activation_15[0][0]
conv2d_19[0][0]
____________________________________________________________________________________________________
concatenate_8 (Concatenate) (None, 15, 15, 224) 0 activation_19[0][0]
activation_20[0][0]
activation_21[0][0]
conv2d_27[0][0]
____________________________________________________________________________________________________
concatenate_10 (Concatenate) (None, 15, 15, 224) 0 activation_25[0][0]
activation_26[0][0]
activation_27[0][0]
conv2d_35[0][0]
____________________________________________________________________________________________________
concatenate_11 (Concatenate) (None, 15, 15, 448) 0 concatenate_6[0][0]
concatenate_8[0][0]
____________________________________________________________________________________________________
concatenate_12 (Concatenate) (None, 15, 15, 448) 0 concatenate_8[0][0]
concatenate_10[0][0]
____________________________________________________________________________________________________
conv2d_36 (Conv2D) (None, 15, 15, 336) 1355088 concatenate_11[0][0]
____________________________________________________________________________________________________
conv2d_37 (Conv2D) (None, 15, 15, 336) 1355088 concatenate_12[0][0]
____________________________________________________________________________________________________
batch_normalization_28 (BatchNor (None, 15, 15, 336) 1344 conv2d_36[0][0]
____________________________________________________________________________________________________
batch_normalization_29 (BatchNor (None, 15, 15, 336) 1344 conv2d_37[0][0]
____________________________________________________________________________________________________
activation_28 (Activation) (None, 15, 15, 336) 0 batch_normalization_28[0][0]
____________________________________________________________________________________________________
activation_29 (Activation) (None, 15, 15, 336) 0 batch_normalization_29[0][0]
____________________________________________________________________________________________________
concatenate_13 (Concatenate) (None, 15, 15, 672) 0 activation_28[0][0]
activation_29[0][0]
____________________________________________________________________________________________________
conv2d_38 (Conv2D) (None, 15, 15, 336) 2032464 concatenate_13[0][0]
____________________________________________________________________________________________________
batch_normalization_30 (BatchNor (None, 15, 15, 336) 1344 conv2d_38[0][0]
____________________________________________________________________________________________________
activation_30 (Activation) (None, 15, 15, 336) 0 batch_normalization_30[0][0]
____________________________________________________________________________________________________
conv2d_39 (Conv2D) (None, 15, 15, 224) 75488 activation_30[0][0]
____________________________________________________________________________________________________
batch_normalization_31 (BatchNor (None, 15, 15, 224) 896 conv2d_39[0][0]
____________________________________________________________________________________________________
activation_31 (Activation) (None, 15, 15, 224) 0 batch_normalization_31[0][0]
____________________________________________________________________________________________________
conv2d_40 (Conv2D) (None, 15, 15, 80) 18000 activation_31[0][0]
____________________________________________________________________________________________________
conv2d_41 (Conv2D) (None, 15, 15, 80) 161360 activation_31[0][0]
____________________________________________________________________________________________________
conv2d_42 (Conv2D) (None, 15, 15, 32) 179232 activation_31[0][0]
____________________________________________________________________________________________________
conv2d_44 (Conv2D) (None, 15, 15, 80) 18000 activation_31[0][0]
____________________________________________________________________________________________________
conv2d_45 (Conv2D) (None, 15, 15, 80) 161360 activation_31[0][0]
____________________________________________________________________________________________________
conv2d_46 (Conv2D) (None, 15, 15, 32) 179232 activation_31[0][0]
____________________________________________________________________________________________________
batch_normalization_32 (BatchNor (None, 15, 15, 80) 320 conv2d_40[0][0]
____________________________________________________________________________________________________
batch_normalization_33 (BatchNor (None, 15, 15, 80) 320 conv2d_41[0][0]
____________________________________________________________________________________________________
batch_normalization_34 (BatchNor (None, 15, 15, 32) 128 conv2d_42[0][0]
____________________________________________________________________________________________________
max_pooling2d_11 (MaxPooling2D) (None, 15, 15, 224) 0 activation_31[0][0]
____________________________________________________________________________________________________
batch_normalization_35 (BatchNor (None, 15, 15, 80) 320 conv2d_44[0][0]
____________________________________________________________________________________________________
batch_normalization_36 (BatchNor (None, 15, 15, 80) 320 conv2d_45[0][0]
____________________________________________________________________________________________________
batch_normalization_37 (BatchNor (None, 15, 15, 32) 128 conv2d_46[0][0]
____________________________________________________________________________________________________
max_pooling2d_12 (MaxPooling2D) (None, 15, 15, 224) 0 activation_31[0][0]
____________________________________________________________________________________________________
activation_32 (Activation) (None, 15, 15, 80) 0 batch_normalization_32[0][0]
____________________________________________________________________________________________________
activation_33 (Activation) (None, 15, 15, 80) 0 batch_normalization_33[0][0]
____________________________________________________________________________________________________
activation_34 (Activation) (None, 15, 15, 32) 0 batch_normalization_34[0][0]
____________________________________________________________________________________________________
conv2d_43 (Conv2D) (None, 15, 15, 32) 7200 max_pooling2d_11[0][0]
____________________________________________________________________________________________________
activation_35 (Activation) (None, 15, 15, 80) 0 batch_normalization_35[0][0]
____________________________________________________________________________________________________
activation_36 (Activation) (None, 15, 15, 80) 0 batch_normalization_36[0][0]
____________________________________________________________________________________________________
activation_37 (Activation) (None, 15, 15, 32) 0 batch_normalization_37[0][0]
____________________________________________________________________________________________________
conv2d_47 (Conv2D) (None, 15, 15, 32) 7200 max_pooling2d_12[0][0]
____________________________________________________________________________________________________
concatenate_14 (Concatenate) (None, 15, 15, 224) 0 activation_32[0][0]
activation_33[0][0]
activation_34[0][0]
conv2d_43[0][0]
____________________________________________________________________________________________________
concatenate_15 (Concatenate) (None, 15, 15, 224) 0 activation_35[0][0]
activation_36[0][0]
activation_37[0][0]
conv2d_47[0][0]
____________________________________________________________________________________________________
conv2d_48 (Conv2D) (None, 15, 15, 48) 10800 concatenate_14[0][0]
____________________________________________________________________________________________________
conv2d_49 (Conv2D) (None, 15, 15, 96) 193632 concatenate_14[0][0]
____________________________________________________________________________________________________
conv2d_50 (Conv2D) (None, 15, 15, 32) 179232 concatenate_14[0][0]
____________________________________________________________________________________________________
conv2d_56 (Conv2D) (None, 15, 15, 48) 10800 concatenate_15[0][0]
____________________________________________________________________________________________________
conv2d_57 (Conv2D) (None, 15, 15, 96) 193632 concatenate_15[0][0]
____________________________________________________________________________________________________
conv2d_58 (Conv2D) (None, 15, 15, 32) 179232 concatenate_15[0][0]
____________________________________________________________________________________________________
batch_normalization_38 (BatchNor (None, 15, 15, 48) 192 conv2d_48[0][0]
____________________________________________________________________________________________________
batch_normalization_39 (BatchNor (None, 15, 15, 96) 384 conv2d_49[0][0]
____________________________________________________________________________________________________
batch_normalization_40 (BatchNor (None, 15, 15, 32) 128 conv2d_50[0][0]
____________________________________________________________________________________________________
max_pooling2d_13 (MaxPooling2D) (None, 15, 15, 224) 0 concatenate_14[0][0]
____________________________________________________________________________________________________
batch_normalization_44 (BatchNor (None, 15, 15, 48) 192 conv2d_56[0][0]
____________________________________________________________________________________________________
batch_normalization_45 (BatchNor (None, 15, 15, 96) 384 conv2d_57[0][0]
____________________________________________________________________________________________________
batch_normalization_46 (BatchNor (None, 15, 15, 32) 128 conv2d_58[0][0]
____________________________________________________________________________________________________
max_pooling2d_15 (MaxPooling2D) (None, 15, 15, 224) 0 concatenate_15[0][0]
____________________________________________________________________________________________________
activation_38 (Activation) (None, 15, 15, 48) 0 batch_normalization_38[0][0]
____________________________________________________________________________________________________
activation_39 (Activation) (None, 15, 15, 96) 0 batch_normalization_39[0][0]
____________________________________________________________________________________________________
activation_40 (Activation) (None, 15, 15, 32) 0 batch_normalization_40[0][0]
____________________________________________________________________________________________________
conv2d_51 (Conv2D) (None, 15, 15, 32) 7200 max_pooling2d_13[0][0]
____________________________________________________________________________________________________
activation_44 (Activation) (None, 15, 15, 48) 0 batch_normalization_44[0][0]
____________________________________________________________________________________________________
activation_45 (Activation) (None, 15, 15, 96) 0 batch_normalization_45[0][0]
____________________________________________________________________________________________________
activation_46 (Activation) (None, 15, 15, 32) 0 batch_normalization_46[0][0]
____________________________________________________________________________________________________
conv2d_59 (Conv2D) (None, 15, 15, 32) 7200 max_pooling2d_15[0][0]
____________________________________________________________________________________________________
concatenate_16 (Concatenate) (None, 15, 15, 208) 0 activation_38[0][0]
activation_39[0][0]
activation_40[0][0]
conv2d_51[0][0]
____________________________________________________________________________________________________
concatenate_18 (Concatenate) (None, 15, 15, 208) 0 activation_44[0][0]
activation_45[0][0]
activation_46[0][0]
conv2d_59[0][0]
____________________________________________________________________________________________________
conv2d_52 (Conv2D) (None, 15, 15, 112) 23408 concatenate_16[0][0]
____________________________________________________________________________________________________
conv2d_53 (Conv2D) (None, 15, 15, 48) 89904 concatenate_16[0][0]
____________________________________________________________________________________________________
conv2d_54 (Conv2D) (None, 15, 15, 32) 166432 concatenate_16[0][0]
____________________________________________________________________________________________________
conv2d_60 (Conv2D) (None, 15, 15, 112) 23408 concatenate_18[0][0]
____________________________________________________________________________________________________
conv2d_61 (Conv2D) (None, 15, 15, 48) 89904 concatenate_18[0][0]
____________________________________________________________________________________________________
conv2d_62 (Conv2D) (None, 15, 15, 32) 166432 concatenate_18[0][0]
____________________________________________________________________________________________________
batch_normalization_41 (BatchNor (None, 15, 15, 112) 448 conv2d_52[0][0]
____________________________________________________________________________________________________
batch_normalization_42 (BatchNor (None, 15, 15, 48) 192 conv2d_53[0][0]
____________________________________________________________________________________________________
batch_normalization_43 (BatchNor (None, 15, 15, 32) 128 conv2d_54[0][0]
____________________________________________________________________________________________________
max_pooling2d_14 (MaxPooling2D) (None, 15, 15, 208) 0 concatenate_16[0][0]
____________________________________________________________________________________________________
batch_normalization_47 (BatchNor (None, 15, 15, 112) 448 conv2d_60[0][0]
____________________________________________________________________________________________________
batch_normalization_48 (BatchNor (None, 15, 15, 48) 192 conv2d_61[0][0]
____________________________________________________________________________________________________
batch_normalization_49 (BatchNor (None, 15, 15, 32) 128 conv2d_62[0][0]
____________________________________________________________________________________________________
max_pooling2d_16 (MaxPooling2D) (None, 15, 15, 208) 0 concatenate_18[0][0]
____________________________________________________________________________________________________
activation_41 (Activation) (None, 15, 15, 112) 0 batch_normalization_41[0][0]
____________________________________________________________________________________________________
activation_42 (Activation) (None, 15, 15, 48) 0 batch_normalization_42[0][0]
____________________________________________________________________________________________________
activation_43 (Activation) (None, 15, 15, 32) 0 batch_normalization_43[0][0]
____________________________________________________________________________________________________
conv2d_55 (Conv2D) (None, 15, 15, 48) 10032 max_pooling2d_14[0][0]
____________________________________________________________________________________________________
activation_47 (Activation) (None, 15, 15, 112) 0 batch_normalization_47[0][0]
____________________________________________________________________________________________________
activation_48 (Activation) (None, 15, 15, 48) 0 batch_normalization_48[0][0]
____________________________________________________________________________________________________
activation_49 (Activation) (None, 15, 15, 32) 0 batch_normalization_49[0][0]
____________________________________________________________________________________________________
conv2d_63 (Conv2D) (None, 15, 15, 48) 10032 max_pooling2d_16[0][0]
____________________________________________________________________________________________________
concatenate_17 (Concatenate) (None, 15, 15, 240) 0 activation_41[0][0]
activation_42[0][0]
activation_43[0][0]
conv2d_55[0][0]
____________________________________________________________________________________________________
concatenate_19 (Concatenate) (None, 15, 15, 240) 0 activation_47[0][0]
activation_48[0][0]
activation_49[0][0]
conv2d_63[0][0]
____________________________________________________________________________________________________
conv2d_64 (Conv2D) (None, 7, 7, 96) 207456 concatenate_17[0][0]
____________________________________________________________________________________________________
conv2d_65 (Conv2D) (None, 7, 7, 96) 207456 concatenate_19[0][0]
____________________________________________________________________________________________________
batch_normalization_50 (BatchNor (None, 7, 7, 96) 384 conv2d_64[0][0]
____________________________________________________________________________________________________
batch_normalization_51 (BatchNor (None, 7, 7, 96) 384 conv2d_65[0][0]
____________________________________________________________________________________________________
activation_50 (Activation) (None, 7, 7, 96) 0 batch_normalization_50[0][0]
____________________________________________________________________________________________________
max_pooling2d_17 (MaxPooling2D) (None, 7, 7, 240) 0 concatenate_17[0][0]
____________________________________________________________________________________________________
activation_51 (Activation) (None, 7, 7, 96) 0 batch_normalization_51[0][0]
____________________________________________________________________________________________________
max_pooling2d_18 (MaxPooling2D) (None, 7, 7, 240) 0 concatenate_19[0][0]
____________________________________________________________________________________________________
concatenate_20 (Concatenate) (None, 7, 7, 336) 0 activation_50[0][0]
max_pooling2d_17[0][0]
____________________________________________________________________________________________________
concatenate_21 (Concatenate) (None, 7, 7, 336) 0 activation_51[0][0]
max_pooling2d_18[0][0]
____________________________________________________________________________________________________
concatenate_22 (Concatenate) (None, 7, 7, 672) 0 concatenate_20[0][0]
concatenate_21[0][0]
____________________________________________________________________________________________________
conv2d_66 (Conv2D) (None, 7, 7, 504) 3048696 concatenate_22[0][0]
____________________________________________________________________________________________________
batch_normalization_52 (BatchNor (None, 7, 7, 504) 2016 conv2d_66[0][0]
____________________________________________________________________________________________________
activation_52 (Activation) (None, 7, 7, 504) 0 batch_normalization_52[0][0]
____________________________________________________________________________________________________
conv2d_67 (Conv2D) (None, 7, 7, 336) 169680 activation_52[0][0]
____________________________________________________________________________________________________
batch_normalization_53 (BatchNor (None, 7, 7, 336) 1344 conv2d_67[0][0]
____________________________________________________________________________________________________
activation_53 (Activation) (None, 7, 7, 336) 0 batch_normalization_53[0][0]
____________________________________________________________________________________________________
conv2d_68 (Conv2D) (None, 7, 7, 176) 59312 activation_53[0][0]
____________________________________________________________________________________________________
conv2d_69 (Conv2D) (None, 7, 7, 160) 484000 activation_53[0][0]
____________________________________________________________________________________________________
conv2d_70 (Conv2D) (None, 7, 7, 96) 806496 activation_53[0][0]
____________________________________________________________________________________________________
conv2d_76 (Conv2D) (None, 7, 7, 176) 59312 activation_53[0][0]
____________________________________________________________________________________________________
conv2d_77 (Conv2D) (None, 7, 7, 160) 484000 activation_53[0][0]
____________________________________________________________________________________________________
conv2d_78 (Conv2D) (None, 7, 7, 96) 806496 activation_53[0][0]
____________________________________________________________________________________________________
batch_normalization_54 (BatchNor (None, 7, 7, 176) 704 conv2d_68[0][0]
____________________________________________________________________________________________________
batch_normalization_55 (BatchNor (None, 7, 7, 160) 640 conv2d_69[0][0]
____________________________________________________________________________________________________
batch_normalization_56 (BatchNor (None, 7, 7, 96) 384 conv2d_70[0][0]
____________________________________________________________________________________________________
max_pooling2d_19 (MaxPooling2D) (None, 7, 7, 336) 0 activation_53[0][0]
____________________________________________________________________________________________________
batch_normalization_60 (BatchNor (None, 7, 7, 176) 704 conv2d_76[0][0]
____________________________________________________________________________________________________
batch_normalization_61 (BatchNor (None, 7, 7, 160) 640 conv2d_77[0][0]
____________________________________________________________________________________________________
batch_normalization_62 (BatchNor (None, 7, 7, 96) 384 conv2d_78[0][0]
____________________________________________________________________________________________________
max_pooling2d_21 (MaxPooling2D) (None, 7, 7, 336) 0 activation_53[0][0]
____________________________________________________________________________________________________
activation_54 (Activation) (None, 7, 7, 176) 0 batch_normalization_54[0][0]
____________________________________________________________________________________________________
activation_55 (Activation) (None, 7, 7, 160) 0 batch_normalization_55[0][0]
____________________________________________________________________________________________________
activation_56 (Activation) (None, 7, 7, 96) 0 batch_normalization_56[0][0]
____________________________________________________________________________________________________
conv2d_71 (Conv2D) (None, 7, 7, 96) 32352 max_pooling2d_19[0][0]
____________________________________________________________________________________________________
activation_60 (Activation) (None, 7, 7, 176) 0 batch_normalization_60[0][0]
____________________________________________________________________________________________________
activation_61 (Activation) (None, 7, 7, 160) 0 batch_normalization_61[0][0]
____________________________________________________________________________________________________
activation_62 (Activation) (None, 7, 7, 96) 0 batch_normalization_62[0][0]
____________________________________________________________________________________________________
conv2d_79 (Conv2D) (None, 7, 7, 96) 32352 max_pooling2d_21[0][0]
____________________________________________________________________________________________________
concatenate_23 (Concatenate) (None, 7, 7, 528) 0 activation_54[0][0]
activation_55[0][0]
activation_56[0][0]
conv2d_71[0][0]
____________________________________________________________________________________________________
concatenate_25 (Concatenate) (None, 7, 7, 528) 0 activation_60[0][0]
activation_61[0][0]
activation_62[0][0]
conv2d_79[0][0]
____________________________________________________________________________________________________
conv2d_72 (Conv2D) (None, 7, 7, 176) 93104 concatenate_23[0][0]
____________________________________________________________________________________________________
conv2d_73 (Conv2D) (None, 7, 7, 160) 760480 concatenate_23[0][0]
____________________________________________________________________________________________________
conv2d_74 (Conv2D) (None, 7, 7, 96) 1267296 concatenate_23[0][0]
____________________________________________________________________________________________________
conv2d_80 (Conv2D) (None, 7, 7, 176) 93104 concatenate_25[0][0]
____________________________________________________________________________________________________
conv2d_81 (Conv2D) (None, 7, 7, 160) 760480 concatenate_25[0][0]
____________________________________________________________________________________________________
conv2d_82 (Conv2D) (None, 7, 7, 96) 1267296 concatenate_25[0][0]
____________________________________________________________________________________________________
batch_normalization_57 (BatchNor (None, 7, 7, 176) 704 conv2d_72[0][0]
____________________________________________________________________________________________________
batch_normalization_58 (BatchNor (None, 7, 7, 160) 640 conv2d_73[0][0]
____________________________________________________________________________________________________
batch_normalization_59 (BatchNor (None, 7, 7, 96) 384 conv2d_74[0][0]
____________________________________________________________________________________________________
max_pooling2d_20 (MaxPooling2D) (None, 7, 7, 528) 0 concatenate_23[0][0]
____________________________________________________________________________________________________
batch_normalization_63 (BatchNor (None, 7, 7, 176) 704 conv2d_80[0][0]
____________________________________________________________________________________________________
batch_normalization_64 (BatchNor (None, 7, 7, 160) 640 conv2d_81[0][0]
____________________________________________________________________________________________________
batch_normalization_65 (BatchNor (None, 7, 7, 96) 384 conv2d_82[0][0]
____________________________________________________________________________________________________
max_pooling2d_22 (MaxPooling2D) (None, 7, 7, 528) 0 concatenate_25[0][0]
____________________________________________________________________________________________________
activation_57 (Activation) (None, 7, 7, 176) 0 batch_normalization_57[0][0]
____________________________________________________________________________________________________
activation_58 (Activation) (None, 7, 7, 160) 0 batch_normalization_58[0][0]
____________________________________________________________________________________________________
activation_59 (Activation) (None, 7, 7, 96) 0 batch_normalization_59[0][0]
____________________________________________________________________________________________________
conv2d_75 (Conv2D) (None, 7, 7, 96) 50784 max_pooling2d_20[0][0]
____________________________________________________________________________________________________
activation_63 (Activation) (None, 7, 7, 176) 0 batch_normalization_63[0][0]
____________________________________________________________________________________________________
activation_64 (Activation) (None, 7, 7, 160) 0 batch_normalization_64[0][0]
____________________________________________________________________________________________________
activation_65 (Activation) (None, 7, 7, 96) 0 batch_normalization_65[0][0]
____________________________________________________________________________________________________
conv2d_83 (Conv2D) (None, 7, 7, 96) 50784 max_pooling2d_22[0][0]
____________________________________________________________________________________________________
concatenate_24 (Concatenate) (None, 7, 7, 528) 0 activation_57[0][0]
activation_58[0][0]
activation_59[0][0]
conv2d_75[0][0]
____________________________________________________________________________________________________
concatenate_26 (Concatenate) (None, 7, 7, 528) 0 activation_63[0][0]
activation_64[0][0]
activation_65[0][0]
conv2d_83[0][0]
____________________________________________________________________________________________________
concatenate_27 (Concatenate) (None, 7, 7, 1056) 0 concatenate_24[0][0]
concatenate_26[0][0]
____________________________________________________________________________________________________
average_pooling2d (AveragePoolin (None, 1, 1, 1056) 0 concatenate_27[0][0]
____________________________________________________________________________________________________
dropout (Dropout) (None, 1, 1, 1056) 0 average_pooling2d[0][0]
____________________________________________________________________________________________________
flatten (Flatten) (None, 1056) 0 dropout[0][0]
____________________________________________________________________________________________________
dense (Dense) (None, 10) 10570 flatten[0][0]
____________________________________________________________________________________________________
activation_66 (Activation) (None, 10) 0 dense[0][0]
====================================================================================================
Total params: 20,452,802
Trainable params: 20,439,154
Non-trainable params: 13,648
____________________________________________________________________________________________________
history = model.fit(aug_train, validation_data = aug_vali,
steps_per_epoch = (len(X_train)-2501) // 64,
epochs = n_epochs, callbacks = callback,
verbose = 1)
2022-02-24 10:24:41.409065: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2) 2022-02-24 10:24:41.430040: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3699850000 Hz
Epoch 1/50
2022-02-24 10:24:45.532749: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10 2022-02-24 10:24:45.730433: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7 2022-02-24 10:24:46.509530: W tensorflow/stream_executor/gpu/asm_compiler.cc:63] Running ptxas --version returned 256 2022-02-24 10:24:46.531076: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: ptxas exited with non-zero error code 256, output: Relying on driver to perform ptx compilation. Modify $PATH to customize ptxas location. This message will be only logged once.
742/742 [==============================] - 192s 212ms/step - loss: 1.6961 - accuracy: 0.3924 - val_loss: 1.1870 - val_accuracy: 0.5768 Epoch 2/50 742/742 [==============================] - 115s 155ms/step - loss: 0.8828 - accuracy: 0.6900 - val_loss: 0.8865 - val_accuracy: 0.6972 Epoch 3/50 742/742 [==============================] - 115s 155ms/step - loss: 0.6630 - accuracy: 0.7690 - val_loss: 0.7526 - val_accuracy: 0.7440 Epoch 4/50 742/742 [==============================] - 115s 155ms/step - loss: 0.5421 - accuracy: 0.8137 - val_loss: 0.6146 - val_accuracy: 0.7812 Epoch 5/50 742/742 [==============================] - 115s 155ms/step - loss: 0.4620 - accuracy: 0.8400 - val_loss: 0.5494 - val_accuracy: 0.8084 Epoch 6/50 742/742 [==============================] - 115s 155ms/step - loss: 0.4063 - accuracy: 0.8610 - val_loss: 0.5299 - val_accuracy: 0.8216 Epoch 7/50 742/742 [==============================] - 115s 155ms/step - loss: 0.3539 - accuracy: 0.8783 - val_loss: 0.5841 - val_accuracy: 0.8032 Epoch 8/50 742/742 [==============================] - 115s 155ms/step - loss: 0.3171 - accuracy: 0.8907 - val_loss: 0.5092 - val_accuracy: 0.8260 Epoch 9/50 742/742 [==============================] - 115s 155ms/step - loss: 0.2737 - accuracy: 0.9050 - val_loss: 0.4394 - val_accuracy: 0.8516 Epoch 10/50 742/742 [==============================] - 115s 155ms/step - loss: 0.2487 - accuracy: 0.9148 - val_loss: 0.4782 - val_accuracy: 0.8420 Epoch 11/50 742/742 [==============================] - 115s 155ms/step - loss: 0.2122 - accuracy: 0.9266 - val_loss: 0.4016 - val_accuracy: 0.8576 Epoch 12/50 742/742 [==============================] - 115s 155ms/step - loss: 0.1927 - accuracy: 0.9329 - val_loss: 0.4208 - val_accuracy: 0.8708 Epoch 13/50 742/742 [==============================] - 115s 155ms/step - loss: 0.1674 - accuracy: 0.9439 - val_loss: 0.3949 - val_accuracy: 0.8740 Epoch 14/50 742/742 [==============================] - 115s 155ms/step - loss: 0.1533 - accuracy: 0.9462 - val_loss: 0.4345 - val_accuracy: 0.8596 Epoch 15/50 742/742 [==============================] - 115s 155ms/step - loss: 0.1383 - accuracy: 0.9510 - val_loss: 0.3851 - val_accuracy: 0.8708 Epoch 16/50 742/742 [==============================] - 115s 155ms/step - loss: 0.1163 - accuracy: 0.9600 - val_loss: 0.4153 - val_accuracy: 0.8800 Epoch 17/50 742/742 [==============================] - 115s 155ms/step - loss: 0.1141 - accuracy: 0.9598 - val_loss: 0.4051 - val_accuracy: 0.8740 Epoch 18/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0979 - accuracy: 0.9656 - val_loss: 0.4260 - val_accuracy: 0.8692 Epoch 19/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0909 - accuracy: 0.9688 - val_loss: 0.3459 - val_accuracy: 0.8932 Epoch 20/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0748 - accuracy: 0.9751 - val_loss: 0.4236 - val_accuracy: 0.8780 Epoch 21/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0659 - accuracy: 0.9771 - val_loss: 0.4141 - val_accuracy: 0.8892 Epoch 22/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0610 - accuracy: 0.9788 - val_loss: 0.4272 - val_accuracy: 0.8776 Epoch 23/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0587 - accuracy: 0.9795 - val_loss: 0.4458 - val_accuracy: 0.8908 Epoch 24/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0486 - accuracy: 0.9839 - val_loss: 0.4206 - val_accuracy: 0.8884 Epoch 25/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0430 - accuracy: 0.9851 - val_loss: 0.3669 - val_accuracy: 0.8984 Epoch 26/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0373 - accuracy: 0.9882 - val_loss: 0.3678 - val_accuracy: 0.8976 Epoch 27/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0314 - accuracy: 0.9885 - val_loss: 0.3424 - val_accuracy: 0.9056 Epoch 28/50 742/742 [==============================] - 115s 156ms/step - loss: 0.0282 - accuracy: 0.9903 - val_loss: 0.3598 - val_accuracy: 0.9088 Epoch 29/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0238 - accuracy: 0.9922 - val_loss: 0.3699 - val_accuracy: 0.8996 Epoch 30/50 742/742 [==============================] - 116s 156ms/step - loss: 0.0258 - accuracy: 0.9916 - val_loss: 0.3682 - val_accuracy: 0.9056 Epoch 31/50 742/742 [==============================] - 116s 156ms/step - loss: 0.0192 - accuracy: 0.9939 - val_loss: 0.3354 - val_accuracy: 0.9112 Epoch 32/50 742/742 [==============================] - 115s 155ms/step - loss: 0.0158 - accuracy: 0.9953 - val_loss: 0.4366 - val_accuracy: 0.8936 Epoch 33/50 742/742 [==============================] - 119s 161ms/step - loss: 0.0174 - accuracy: 0.9946 - val_loss: 0.3605 - val_accuracy: 0.9088 Epoch 34/50 742/742 [==============================] - 119s 160ms/step - loss: 0.0109 - accuracy: 0.9968 - val_loss: 0.3226 - val_accuracy: 0.9116 Epoch 35/50 742/742 [==============================] - 119s 160ms/step - loss: 0.0114 - accuracy: 0.9965 - val_loss: 0.3606 - val_accuracy: 0.9140 Epoch 36/50 742/742 [==============================] - 120s 162ms/step - loss: 0.0095 - accuracy: 0.9972 - val_loss: 0.3724 - val_accuracy: 0.9108 Epoch 37/50 742/742 [==============================] - 119s 161ms/step - loss: 0.0098 - accuracy: 0.9972 - val_loss: 0.3589 - val_accuracy: 0.9120 Epoch 38/50 742/742 [==============================] - 120s 162ms/step - loss: 0.0083 - accuracy: 0.9973 - val_loss: 0.3213 - val_accuracy: 0.9196 Epoch 39/50 742/742 [==============================] - 120s 162ms/step - loss: 0.0075 - accuracy: 0.9979 - val_loss: 0.3219 - val_accuracy: 0.9244 Epoch 40/50 742/742 [==============================] - 120s 161ms/step - loss: 0.0062 - accuracy: 0.9982 - val_loss: 0.3311 - val_accuracy: 0.9212 Epoch 41/50 742/742 [==============================] - 120s 162ms/step - loss: 0.0049 - accuracy: 0.9988 - val_loss: 0.3280 - val_accuracy: 0.9236 Epoch 42/50 742/742 [==============================] - 120s 161ms/step - loss: 0.0053 - accuracy: 0.9987 - val_loss: 0.3426 - val_accuracy: 0.9196 Epoch 43/50 742/742 [==============================] - 120s 162ms/step - loss: 0.0043 - accuracy: 0.9990 - val_loss: 0.3533 - val_accuracy: 0.9128 Epoch 44/50 742/742 [==============================] - 120s 161ms/step - loss: 0.0038 - accuracy: 0.9994 - val_loss: 0.2994 - val_accuracy: 0.9300 Epoch 45/50 742/742 [==============================] - 120s 162ms/step - loss: 0.0034 - accuracy: 0.9994 - val_loss: 0.3304 - val_accuracy: 0.9192 Epoch 46/50 742/742 [==============================] - 120s 162ms/step - loss: 0.0032 - accuracy: 0.9995 - val_loss: 0.3679 - val_accuracy: 0.9196 Epoch 47/50 742/742 [==============================] - 119s 161ms/step - loss: 0.0030 - accuracy: 0.9994 - val_loss: 0.3290 - val_accuracy: 0.9188 Epoch 48/50 742/742 [==============================] - 120s 161ms/step - loss: 0.0030 - accuracy: 0.9994 - val_loss: 0.3424 - val_accuracy: 0.9240 Epoch 49/50 742/742 [==============================] - 121s 162ms/step - loss: 0.0033 - accuracy: 0.9993 - val_loss: 0.3490 - val_accuracy: 0.9168 Epoch 50/50 742/742 [==============================] - 120s 162ms/step - loss: 0.0034 - accuracy: 0.9993 - val_loss: 0.3243 - val_accuracy: 0.9196
PlotModelEval(model, history, X_test, y_test, cifar_labels)
Don't forget to save your model!
model.save("CompetionModel_" + GroupName + ".h5")